--- title: Long file date: 2019-07-19 00:00:00 categories: - RNA-seq author_staff_member: jane-doe image: "https://unsplash.it/600/450?image=448&a=.png" large_header: false ---
library(sva)
library(RUVnormalize)
library(pheatmap)
library(edgeR)
library(GenomicFeatures)
library(mclust)
library(parallel)
library(ggplot2)
library(reshape2)
library(dplyr)
library(data.table)
library(DT)
library(RColorBrewer)
library(knitr)
library(ggrepel)
library(gridExtra)
library(grid)
library(plyr)
library(plotly)
library(Hmisc)
Dataset includes: “frontal cortex”, “Extracted frontal cortex only”, and “Forebrain” samples
### genreate rn5 annotation data ###
if (exists("exonic.gene.sizes")==F) {
txdb <- makeTxDbFromGFF("G:/Shared drives/Nord Lab - Data/MIAx01/RNA/Rattus_norvegicus.Rnor_6.0.90.gtf",format="gtf")
exons.list.per.gene <- exonsBy(txdb,by="gene")
exonic.gene.sizes <- lapply(exons.list.per.gene,function(x){sum(width(reduce(x)))})
}
setwd("G:/Shared drives/Nord Lab - Data/MIAx01/RNA")
min.cpm <- 5
cpm.sample.cutoff <- 2
# load count tables
my.data.1 <- read.table("Project_ANIZ_L1_H1144P_Zdilar_GENE_MATRIX.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
rownames(my.data.1) <- as.character(my.data.1[,1])
my.data.1 <- my.data.1[,c(1:9)]
my.data.2 <- read.table("Project_ANIZ_L5_H1145P_Zdilar_GENE_MATRIX.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
rownames(my.data.2) <- as.character(my.data.2[,1])
my.data.2 <- my.data.2[,c(1:8)]
#This is reading the new dataset
setwd("G:/Shared drives/Nord Lab - Data/MIAx01/")
my.data.3 <- read.table("Project_ANIZ_H1376P_Zdilar_GENE_MATRIX.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
rownames(my.data.3) <- as.character(my.data.3[,1])
setwd("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/Lane 3 of 3")
my.data.4 <- read.table("GENE_MATRIX.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
rownames(my.data.4) <- as.character(my.data.4[,1])
#file making my.data.3 is identical to th file making my.data.4. I copied the my.data.4 file from the cluster today, as directed by Rinaldo.
#Let's merge those files together
my.data <- merge(my.data.1, my.data.2, by="gene_id", all.x=T, all.y=T)
my.data <- merge(my.data, my.data.3, by="gene_id", all.x=T, all.y=T)
rownames(my.data) <- as.character(my.data[,1])
#dim(my.data)
#colnames(my.data)
#I'll quickly check the older gene matrix files and compare the colnames with the new version
setwd("G:/Shared drives/Nord Lab - Data/MIAx01/RNA")
test.file <- read.table("MAA_GENE_MATRIX.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
#dim(test.file)
#colnames(test.file)
#Great, test.file contains an older version of this analysis.
setwd("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/RNA")
# details on each sample
sample.info <- read.table("MAA_SampleInfo_complete.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
#In addition to frontal cortex I'm adding Forebrain samples. There are no frontal cortex samples in the new batch. I had to manually add 0s to the outlier columns, because I haven't found the original information in metafiles yet.
#There are the following brain regions in the dataset:
#[1] "frontal cortex" "whole brain"
#[3] "Extracted frontal cortex only" "Forebrain"
#Consult Alex about that.
#dim(sample.info)
#head(my.data)
# select samples for analysis
use.samples <- sample.info[which(sample.info[,"Region"] %in% c("frontal cortex", "Forebrain", "Extracted frontal cortex only") & sample.info[,"Outlier"]==0),"Sample_ID"]
use.samples.rows <- which(sample.info[,"Region"] %in% c("frontal cortex", "Forebrain", "Extracted frontal cortex only") & sample.info[,"Outlier"]==0)
# run analysis
exp.data <- my.data
### sanity check???
#head(exp.data)
#write.table(my.data, "MAA_GeneCounts.txt", sep="/t", col.names = T, row.names = F, quote=F)
rownames(exp.data) <- as.character(exp.data[,1])
exp.data <- exp.data[,-1]
exp.data <- exp.data[,use.samples] # selects data by column names
# merge symbol to ENS ID and run
ENS.to.Symbol <- read.table("Rn6.ENSEMBL.Symbol.txt", sep="\t", header=T, as.is = T, stringsAsFactors = F, comment.char = "#", quote = "")
#Some reformating was needed
colnames(ENS.to.Symbol)[3] <- "gene_name"
my.data$gene_name <- rownames(my.data)
my.data.symbol <- merge(my.data, ENS.to.Symbol[,c(1,3)], by="gene_name")
my.data.symbol$gene_name <- NULL
#head(my.data.symbol)
my.data.symbol <- my.data.symbol[order(rowSums(my.data.symbol[,c(2:24)]), decreasing = T),]
#removes duplicated rows
my.data.symbol <- my.data.symbol[which(duplicated(my.data.symbol[,1])==F),]
rownames(my.data.symbol) <- my.data.symbol[,1]
exp.data <- my.data.symbol[,2:24]
gene.lengths <- vector(length=nrow(exp.data))
for (index in 1:length(gene.lengths)) {
gene.lengths[index] <- as.numeric(exonic.gene.sizes[my.data.symbol[index,"Gene.stable.ID"]])
}
rpkm.data <- rpkm(exp.data, gene.length=gene.lengths, normalized.lib.sizes=TRUE, log=T, prior.count=.25)
rpkm.data <- cbind(Gene=rownames(rpkm.data), rpkm.data)
#write.table(cbind(Gene=rownames(rpkm.data), rpkm.data), "MAA_RPKM.txt", sep="\t", col.names=T, row.names=F)
# covariates
group <- sample.info[use.samples.rows,"Condition"]
group <- as.factor(ifelse(grepl("Adj",group)==T,0,1))
tissue <- sample.info[use.samples.rows,"Region"]
sex <- sample.info[use.samples.rows,"Sex"]
lane <- sample.info[use.samples.rows,"Lane"]
# edgeR on full
x <- as.matrix(exp.data[,use.samples])
rownames(x) <- rownames(exp.data)
y <- DGEList(counts=x,group=group)
keep <- rowSums(cpm(y)>min.cpm) >= cpm.sample.cutoff
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
y <- estimateCommonDisp(y)
y <- estimateTagwiseDisp(y)
et <- exactTest(y)
#topTags(et)
sample.counts <- y$pseudo.counts
sample.counts <- cbind(Gene=rownames(sample.counts), sample.counts)
#Sex and tissue dissections are used as covariates
design <- model.matrix(~as.factor(sex)+ lane +as.factor(tissue)+as.factor(group))
y <- estimateGLMCommonDisp(y,design)
y <- estimateGLMTrendedDisp(y,design)
y <- estimateGLMTagwiseDisp(y,design)
norm.vals <- cpm(y)
fit <- glmFit(y,design)
lrt <- glmLRT(fit)
glm.output.table <- topTags(lrt, n=Inf)
#topTags(lrt, n=10)
#write.table(glm.output.table$table, "MAA_Full_DE.txt", sep="\t", col.names=T, row.names = T)
glm.output.table <- cbind(Gene=rownames(glm.output.table$table), glm.output.table$table)
colnames(glm.output.table) <- paste(colnames(glm.output.table), "All", sep=".")
colnames(glm.output.table)[1] <- "Gene"
test.counts <- y$pseudo.counts
pca.results <- prcomp(scale(log(test.counts+1), center=T, scale=T))
MDS_data <- plotMDS(y,plot = F)
MDS_data2 <- data.frame(Leading_logFC_dim_1 = MDS_data$x, Leading_logFC_dim_2=MDS_data$y)
cond <- sample.info[use.samples.rows,"Condition"]
MDS_data2 <- data.frame(Samples=rownames(MDS_data2),
MDS_data2,
Conditions_original = cond,
Conditions = ifelse(cond %in% c("Adj-M", "Adj-U"), "Adj", "Imm"),
Group=as.factor(ifelse(grepl("Adj",cond)==T,0,1)),
Lane = sample.info[use.samples.rows,"Lane"],
Region= sample.info[use.samples.rows,"Region"],
sex= sample.info[use.samples.rows,"Sex"])
rownames(MDS_data2) <- NULL
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, shape=Group, colour=Conditions))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Lane)))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Region)))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Region), shape=sex))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
rownames(glm.output.table) <- NULL
colnames(glm.output.table) <- c("gene_name", "logFC", "logCPM", "LR", "PValue", "FDR")
# Ns of DE genes
P_up_005 <- nrow(filter(glm.output.table, PValue < 0.05, logFC > 0))
P_down_005 <- nrow(filter(glm.output.table, PValue < 0.05, logFC < 0))
P_up_001 <- nrow(filter(glm.output.table, PValue < 0.01, logFC > 0))
P_down_001 <- nrow(filter(glm.output.table, PValue < 0.01, logFC < 0))
FDR_up <- nrow(filter(glm.output.table, FDR < 0.05, logFC > 0))
FDR_down <- nrow(filter(glm.output.table, FDR < 0.05, logFC < 0))
DE_df_n <- t(data.frame(
"PValue below 0.05" = c(P_up_005, P_down_005),
"PValue below 0.01" = c(P_up_001, P_down_001),
"FDR below 0.05" = c(FDR_up, FDR_down)))
colnames(DE_df_n) <- c("Upregulated", "Downregulated")
DE_df_n_melted <- melt(DE_df_n)
ggplot(DE_df_n_melted, aes(fill=Var2, x=Var1, y=value))+
geom_bar(position="dodge", stat="identity", color="black")+
geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.25, size = 8)+
theme_bw()+
scale_fill_manual(values=brewer.pal(n = 8, name = "Paired")[c(6,2)])+
theme(legend.title=element_blank())+
labs(title="Number of differentailly expressed genes", x="", y="n of DE genes")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text=element_text(size=20))
#####
volcano_plot_text <- function(x) {
#x <- glm.output.table
significance_threshold <- 0.05
#This is nicely dealing with datasets with missing cathegories.
test <- ifelse(x$PValue, "Non significant")
test <- ifelse(x$PValue <0.05, "PValue < 0.05", test)
test <- ifelse(x$logFC > 1, "logFC > abs(1)", test)
test <- ifelse(x$logFC < -1, "logFC > abs(1)", test)
test <- ifelse(x$logFC > 1 & x$PValue <0.05, "PValue < 0.05 & logFC > abs(1)", test)
test <- ifelse(x$logFC < -1 & x$PValue <0.05, "PValue < 0.05 & logFC > abs(1)", test)
plotDat <- data.frame(x, Group=test)
ggplot(plotDat, aes(x = logFC, y=-log10(PValue), fill=Group, col=Group, label = gene_name)) +
geom_point(pch=21, alpha = 0.5, size = 2)+
theme_light()+
scale_fill_manual(values=c("Non significant"="black", "PValue < 0.05"="orange", "logFC > abs(1)"="green4", "PValue < 0.05 & logFC > abs(1)"="red"))+
scale_color_manual(values=c("Non significant"="black", "PValue < 0.05"="orange", "logFC > abs(1)"="green4", "PValue < 0.05 & logFC > abs(1)"="red"))+
labs(title= "")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(legend.text=element_text(size=16))+
theme(axis.text=element_text(size=14))+
theme(legend.title=element_blank())+
theme(axis.text=element_text(size=14))+
theme(axis.title = element_text(size=14, face = "bold"))+
theme(legend.position="bottom")
#scale_x_continuous(limits = c(-7.5, 7.5))
}
p <- volcano_plot_text(glm.output.table)
ggplotly(p) %>%
layout(legend = list(
orientation = "h",y = -0.1
)
)
datatable(glm.output.table, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T) )
#save.image("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/MAA_workspace.RData")
#load("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/MAA_workspace.RData")
## Genes with PValue < 0.05
GO_analysis_0.05 <- function(q, b, c) {
library(topGO)
background.genes <- q$gene_name
geneUniverse <- background.genes
if(b=="upregulated"){
test.genes <- filter(q, PValue<0.05, logFC > 0)$gene_name
} else if (b=="downregulated"){
test.genes <- filter(q, PValue<0.05, logFC < 0)$gene_name
} else {
print("Incorect fold change parameter")
stop()
}
genesOfInterest <- test.genes
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot=annFUN.org, mapping="org.Rn.eg.db", ID = "alias", nodeSize=20)
print(myGOdata)
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")
#resultKS <- runTest(myGOdata, algorithm = "classic", statistic = "ks")
#resultKS.elim <- runTest(myGOdata, algorithm = "elim", statistic = "ks")
#classicKS = resultKS, elimKS = resultKS.elim, - add later
allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)
#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))
padding = 5
allRes$Enrichment <- log2((allRes$Significant + padding)/(allRes$Expected + padding))
#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
fisher.go <- allRes[r,1]
#print(allRes[x,c(1,2)])
fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), q$gene_name))
df <- filter(df, gene_name %in% test.genes)
df
}
list(allRes, lapply(1:nrow(allRes), DE_genes_in_top_GO_cat))
}
## Genes with PValue < 0.01
GO_analysis_0.01 <- function(q, b, c) {
library(topGO)
background.genes <- q$gene_name
geneUniverse <- background.genes
if(b=="upregulated"){
test.genes <- filter(q, PValue<0.01, logFC > 0)$gene_name
} else if (b=="downregulated"){
test.genes <- filter(q, PValue<0.01, logFC < 0)$gene_name
} else {
print("Incorect fold change parameter")
stop()
}
genesOfInterest <- test.genes
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot=annFUN.org, mapping="org.Rn.eg.db", ID = "alias", nodeSize=20)
print(myGOdata)
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")
#resultKS <- runTest(myGOdata, algorithm = "classic", statistic = "ks")
#resultKS.elim <- runTest(myGOdata, algorithm = "elim", statistic = "ks")
#classicKS = resultKS, elimKS = resultKS.elim, - add later
allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)
#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))
padding = 5
allRes$Enrichment <- log2((allRes$Significant + padding)/(allRes$Expected + padding))
#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
fisher.go <- allRes[r,1]
#print(allRes[x,c(1,2)])
fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), q$gene_name))
df <- filter(df, gene_name %in% test.genes)
df
}
list(allRes, lapply(1:nrow(allRes), DE_genes_in_top_GO_cat))
}
GO_up_005 <- GO_analysis_0.05(glm.output.table, "upregulated", 3790)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 10024 available genes (all genes from the array):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 417 significant genes.
##
## 8985 feasible genes (genes that can be used in the analysis):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 387 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3799
## - number of edges = 8394
##
## ------------------------- topGOdata object -------------------------
GO_down_005 <- GO_analysis_0.05(glm.output.table, "downregulated", 3790)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 10024 available genes (all genes from the array):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 316 significant genes.
##
## 8985 feasible genes (genes that can be used in the analysis):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 272 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3799
## - number of edges = 8394
##
## ------------------------- topGOdata object -------------------------
GO_up_001 <- GO_analysis_0.01(glm.output.table, "upregulated", 3790)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 10024 available genes (all genes from the array):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 131 significant genes.
##
## 8985 feasible genes (genes that can be used in the analysis):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 121 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3799
## - number of edges = 8394
##
## ------------------------- topGOdata object -------------------------
GO_down_001 <- GO_analysis_0.01(glm.output.table, "downregulated", 3790)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 10024 available genes (all genes from the array):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 76 significant genes.
##
## 8985 feasible genes (genes that can be used in the analysis):
## - symbol: Hes7 Bhlhe22 Wdr63 Zic4 Tuba1a ...
## - 63 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3799
## - number of edges = 8394
##
## ------------------------- topGOdata object -------------------------
#head(GO_up_005[[1]])
x_1 <- filter(GO_up_005[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_1, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_up_005[[2]]), Term %in% x_1$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_2 <- filter(GO_down_005[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_2, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_down_005[[2]]), Term %in% x_2$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_3 <- filter(GO_up_001[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_3, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_up_001[[2]]), Term %in% x_3$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_4 <- filter(GO_down_001[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_4, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_down_001[[2]]), Term %in% x_4$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
Dataset includes: “Extracted frontal cortex only”, and “Forebrain” samples
setwd("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/RNA")
# details on each sample
sample.info <- read.table("MAA_SampleInfo_complete.txt", sep="\t", header=T, as.is=T, stringsAsFactors = F)
#In addition to frontal cortex I'm adding Forebrain samples. There are no frontal cortex samples in the new batch. I had to manually add 0s to the outlier columns, because I haven't found the original information in metafiles yet.
#There are the following brain regions in the dataset:
#[1] "frontal cortex" "whole brain"
#[3] "Extracted frontal cortex only" "Forebrain"
# select samples for analysis
use.samples_2 <- sample.info[which(sample.info[,"Region"] %in% c("frontal cortex", "Extracted frontal cortex only") & sample.info[,"Outlier"]==0),"Sample_ID"]
use.samples_2.rows <- which(sample.info[,"Region"] %in% c("frontal cortex", "Extracted frontal cortex only") & sample.info[,"Outlier"]==0)
# run analysis
exp.data_2 <- my.data
### sanity check???
#head(exp.data_2)
#write.table(my.data, "MAA_GeneCounts.txt", sep="/t", col.names = T, row.names = F, quote=F)
rownames(exp.data_2) <- as.character(exp.data_2[,1])
exp.data_2 <- exp.data_2[,-1]
exp.data_2 <- exp.data_2[,use.samples_2] # selects data by column names
setwd("G:/Shared drives/Nord Lab - Data/MIAx01/RNA")
# merge symbol to ENS ID and run
ENS.to.Symbol <- read.table("Rn6.ENSEMBL.Symbol.txt", sep="\t", header=T, as.is = T, stringsAsFactors = F, comment.char = "#", quote = "")
#Some reformating was needed
colnames(ENS.to.Symbol)[3] <- "gene_name"
my.data$gene_name <- rownames(my.data)
my.data.symbol <- merge(my.data, ENS.to.Symbol[,c(1,3)], by="gene_name")
my.data.symbol$gene_name <- NULL
#head(my.data.symbol)
my.data.symbol <- my.data.symbol[order(rowSums(my.data.symbol[,c(2:24)]), decreasing = T),]
#removes duplicated rows
my.data.symbol <- my.data.symbol[which(duplicated(my.data.symbol[,1])==F),]
rownames(my.data.symbol) <- my.data.symbol[,1]
exp.data_2 <- my.data.symbol[,2:24]
gene.lengths <- vector(length=nrow(exp.data_2))
for (index in 1:length(gene.lengths)) {
gene.lengths[index] <- as.numeric(exonic.gene.sizes[my.data.symbol[index,"Gene.stable.ID"]])
}
rpkm.data_2 <- rpkm(exp.data_2, gene.length=gene.lengths, normalized.lib.sizes=TRUE, log=T, prior.count=.25)
rpkm.data_2 <- cbind(Gene=rownames(rpkm.data_2), rpkm.data_2)
#write.table(cbind(Gene=rownames(rpkm.data_2), rpkm.data_2), "MAA_RPKM.txt", sep="\t", col.names=T, row.names=F)
# covariates
group <- sample.info[use.samples_2.rows,"Condition"]
group <- as.factor(ifelse(grepl("Adj",group)==T,0,1))
tissue <- sample.info[use.samples_2.rows,"Region"]
sex <- sample.info[use.samples_2.rows,"Sex"]
lane <- sample.info[use.samples_2.rows,"Lane"]
# edgeR on full
x <- as.matrix(exp.data_2[,use.samples_2])
rownames(x) <- rownames(exp.data_2)
y <- DGEList(counts=x,group=group)
keep <- rowSums(cpm(y)>min.cpm) >= cpm.sample.cutoff
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
y <- estimateCommonDisp(y)
y <- estimateTagwiseDisp(y)
et <- exactTest(y)
#topTags(et)
sample.counts <- y$pseudo.counts
sample.counts <- cbind(Gene=rownames(sample.counts), sample.counts)
design <- model.matrix(~as.factor(sex)+as.factor(tissue)+lane+as.factor(group))
y <- estimateGLMCommonDisp(y,design)
y <- estimateGLMTrendedDisp(y,design)
y <- estimateGLMTagwiseDisp(y,design)
norm.vals <- cpm(y)
fit <- glmFit(y,design)
lrt <- glmLRT(fit)
glm.output.table_2 <- topTags(lrt, n=Inf)
#topTags(lrt, n=10)
#write.table(glm.output.table_2$table, "MAA_Full_DE.txt", sep="\t", col.names=T, row.names = T)
glm.output.table_2 <- cbind(Gene=rownames(glm.output.table_2$table), glm.output.table_2$table)
colnames(glm.output.table_2) <- paste(colnames(glm.output.table_2), "All", sep=".")
colnames(glm.output.table_2)[1] <- "Gene"
test.counts <- y$pseudo.counts
pca.results <- prcomp(scale(log(test.counts+1), center=T, scale=T))
MDS_data <- plotMDS(y,plot = F)
MDS_data2 <- data.frame(Leading_logFC_dim_1 = MDS_data$x, Leading_logFC_dim_2=MDS_data$y)
cond <- sample.info[use.samples_2.rows,"Condition"]
MDS_data2 <- data.frame(Samples=rownames(MDS_data2),
MDS_data2,
Conditions_original = cond,
Conditions = ifelse(cond %in% c("Adj-M", "Adj-U"), "Adj", "Imm"),
Group=as.factor(ifelse(grepl("Adj",cond)==T,0,1)),
Lane = sample.info[use.samples_2.rows,"Lane"],
Region= sample.info[use.samples_2.rows,"Region"],
sex= sample.info[use.samples_2.rows,"Sex"])
rownames(MDS_data2) <- NULL
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, shape=group, colour=Conditions))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Lane)))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Region)))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
MDS_colored_DPC <- ggplot(MDS_data2, aes(x=Leading_logFC_dim_1, y=Leading_logFC_dim_2, colour=as.character(Region), shape=sex))+
geom_point(size=3, alpha=0.6)+
theme_bw()+
labs(title = "MDS plot")+
theme(plot.title = element_text(size = rel(1.5), hjust=0.5))
MDS_colored_DPC
rownames(glm.output.table_2) <- NULL
colnames(glm.output.table_2) <- c("gene_name", "logFC", "logCPM", "LR", "PValue", "FDR")
# Ns of DE genes
P_up_005 <- nrow(filter(glm.output.table_2, PValue < 0.05, logFC > 0))
P_down_005 <- nrow(filter(glm.output.table_2, PValue < 0.05, logFC < 0))
P_up_001 <- nrow(filter(glm.output.table_2, PValue < 0.01, logFC > 0))
P_down_001 <- nrow(filter(glm.output.table_2, PValue < 0.01, logFC < 0))
FDR_up <- nrow(filter(glm.output.table_2, FDR < 0.05, logFC > 0))
FDR_down <- nrow(filter(glm.output.table_2, FDR < 0.05, logFC < 0))
DE_df_n <- t(data.frame(
"PValue below 0.05" = c(P_up_005, P_down_005),
"PValue below 0.01" = c(P_up_001, P_down_001),
"FDR below 0.05" = c(FDR_up, FDR_down)))
colnames(DE_df_n) <- c("Upregulated", "Downregulated")
DE_df_n_melted <- melt(DE_df_n)
ggplot(DE_df_n_melted, aes(fill=Var2, x=Var1, y=value))+
geom_bar(position="dodge", stat="identity", color="black")+
geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.25, size = 8)+
theme_bw()+
scale_fill_manual(values=brewer.pal(n = 8, name = "Paired")[c(6,2)])+
theme(legend.title=element_blank())+
labs(title="Number of differentailly expressed genes", x="", y="n of DE genes")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text=element_text(size=20))
#####
volcano_plot_text <- function(x) {
#x <- glm.output.table_2
significance_threshold <- 0.05
#This is nicely dealing with datasets with missing cathegories.
test <- ifelse(x$PValue, "Non significant")
test <- ifelse(x$PValue <0.05, "PValue < 0.05", test)
test <- ifelse(x$logFC > 1, "logFC > abs(1)", test)
test <- ifelse(x$logFC < -1, "logFC > abs(1)", test)
test <- ifelse(x$logFC > 1 & x$PValue <0.05, "PValue < 0.05 & logFC > abs(1)", test)
test <- ifelse(x$logFC < -1 & x$PValue <0.05, "PValue < 0.05 & logFC > abs(1)", test)
plotDat <- data.frame(x, Group=test)
ggplot(plotDat, aes(x = logFC, y=-log10(PValue), fill=Group, col=Group, label = gene_name)) +
geom_point(pch=21, alpha = 0.5, size = 2)+
theme_light()+
scale_fill_manual(values=c("Non significant"="black", "PValue < 0.05"="orange", "logFC > abs(1)"="green4", "PValue < 0.05 & logFC > abs(1)"="red"))+
scale_color_manual(values=c("Non significant"="black", "PValue < 0.05"="orange", "logFC > abs(1)"="green4", "PValue < 0.05 & logFC > abs(1)"="red"))+
labs(title= "")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(legend.text=element_text(size=16))+
theme(axis.text=element_text(size=14))+
theme(legend.title=element_blank())+
theme(axis.text=element_text(size=14))+
theme(axis.title = element_text(size=14, face = "bold"))+
theme(legend.position="bottom")
#scale_x_continuous(limits = c(-7.5, 7.5))
}
p <- volcano_plot_text(glm.output.table_2)
ggplotly(p) %>%
layout(legend = list(
orientation = "h",y = -0.1
)
)
datatable(glm.output.table_2, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T) )
#save.image("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/MAA_workspace.RData")
#load("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/MAA_workspace.RData")
## Genes with PValue < 0.05
GO_analysis_0.05 <- function(q, b, c) {
library(topGO)
background.genes <- q$gene_name
geneUniverse <- background.genes
if(b=="upregulated"){
test.genes <- filter(q, PValue<0.05, logFC > 0)$gene_name
} else if (b=="downregulated"){
test.genes <- filter(q, PValue<0.05, logFC < 0)$gene_name
} else {
print("Incorect fold change parameter")
stop()
}
genesOfInterest <- test.genes
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot=annFUN.org, mapping="org.Rn.eg.db", ID = "alias", nodeSize=20)
print(myGOdata)
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")
#resultKS <- runTest(myGOdata, algorithm = "classic", statistic = "ks")
#resultKS.elim <- runTest(myGOdata, algorithm = "elim", statistic = "ks")
#classicKS = resultKS, elimKS = resultKS.elim, - add later
allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)
#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))
padding = 5
allRes$Enrichment <- log2((allRes$Significant + padding)/(allRes$Expected + padding))
#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
fisher.go <- allRes[r,1]
#print(allRes[x,c(1,2)])
fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), q$gene_name))
df <- filter(df, gene_name %in% test.genes)
df
}
list(allRes, lapply(1:nrow(allRes), DE_genes_in_top_GO_cat))
}
## Genes with PValue < 0.01
GO_analysis_0.01 <- function(q, b, c) {
library(topGO)
background.genes <- q$gene_name
geneUniverse <- background.genes
if(b=="upregulated"){
test.genes <- filter(q, PValue<0.01, logFC > 0)$gene_name
} else if (b=="downregulated"){
test.genes <- filter(q, PValue<0.01, logFC < 0)$gene_name
} else {
print("Incorect fold change parameter")
stop()
}
genesOfInterest <- test.genes
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot=annFUN.org, mapping="org.Rn.eg.db", ID = "alias", nodeSize=20)
print(myGOdata)
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")
#resultKS <- runTest(myGOdata, algorithm = "classic", statistic = "ks")
#resultKS.elim <- runTest(myGOdata, algorithm = "elim", statistic = "ks")
#classicKS = resultKS, elimKS = resultKS.elim, - add later
allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)
#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))
padding = 5
allRes$Enrichment <- log2((allRes$Significant + padding)/(allRes$Expected + padding))
#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
fisher.go <- allRes[r,1]
#print(allRes[x,c(1,2)])
fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), q$gene_name))
df <- filter(df, gene_name %in% test.genes)
df
}
list(allRes, lapply(1:nrow(allRes), DE_genes_in_top_GO_cat))
}
GO_up_005 <- GO_analysis_0.05(glm.output.table_2, "upregulated", 3777)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 9975 available genes (all genes from the array):
## - symbol: Ccdc153 Rpl12 Gfap Dynlrb2 Rps8 ...
## - 1167 significant genes.
##
## 8941 feasible genes (genes that can be used in the analysis):
## - symbol: Rpl12 Gfap Dynlrb2 Rps8 Ak7 ...
## - 1032 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3790
## - number of edges = 8375
##
## ------------------------- topGOdata object -------------------------
GO_down_005 <- GO_analysis_0.05(glm.output.table_2, "downregulated", 3777)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 9975 available genes (all genes from the array):
## - symbol: Ccdc153 Rpl12 Gfap Dynlrb2 Rps8 ...
## - 1058 significant genes.
##
## 8941 feasible genes (genes that can be used in the analysis):
## - symbol: Rpl12 Gfap Dynlrb2 Rps8 Ak7 ...
## - 969 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3790
## - number of edges = 8375
##
## ------------------------- topGOdata object -------------------------
GO_up_001 <- GO_analysis_0.01(glm.output.table_2, "upregulated", 3777)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 9975 available genes (all genes from the array):
## - symbol: Ccdc153 Rpl12 Gfap Dynlrb2 Rps8 ...
## - 538 significant genes.
##
## 8941 feasible genes (genes that can be used in the analysis):
## - symbol: Rpl12 Gfap Dynlrb2 Rps8 Ak7 ...
## - 479 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3790
## - number of edges = 8375
##
## ------------------------- topGOdata object -------------------------
GO_down_001 <- GO_analysis_0.01(glm.output.table_2, "downregulated", 3777)
##
## ------------------------- topGOdata object -------------------------
##
## Description:
## - My project
##
## Ontology:
## - BP
##
## 9975 available genes (all genes from the array):
## - symbol: Ccdc153 Rpl12 Gfap Dynlrb2 Rps8 ...
## - 554 significant genes.
##
## 8941 feasible genes (genes that can be used in the analysis):
## - symbol: Rpl12 Gfap Dynlrb2 Rps8 Ak7 ...
## - 520 significant genes.
##
## GO graph (nodes with at least 20 genes):
## - a graph with directed edges
## - number of nodes = 3790
## - number of edges = 8375
##
## ------------------------- topGOdata object -------------------------
#head(GO_up_005[[1]])
x_1 <- filter(GO_up_005[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_1, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_up_005[[2]]), Term %in% x_1$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_2 <- filter(GO_down_005[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_2, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_down_005[[2]]), Term %in% x_2$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_3 <- filter(GO_up_001[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_3, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_up_001[[2]]), Term %in% x_3$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
x_4 <- filter(GO_down_001[[1]], classicFisher < 0.05, Enrichment > 0)
datatable(x_4, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
datatable(filter(rbindlist(GO_down_001[[2]]), Term %in% x_4$Term), rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
rpkm.data_linear <- rpkm(exp.data, gene.length=gene.lengths, log=F)
sample.info_clean <- sample.info[use.samples.rows,]
sample.info$Condition2 <- as.character(ifelse(grepl("Adj",sample.info$Condition)==T, "Adj","Imm"))
#This is some ugly coding, but works correctly.
ed <- rpkm(exp.data[,use.samples], gene.length=gene.lengths, log=F)
rpkm.batch <- removeBatchEffect(ed,
batch = sample.info$Region[use.samples.rows],
design = cbind(
sample.info$Lane[use.samples.rows],
as.factor(sample.info$Sex)[use.samples.rows],
as.factor(sample.info$Condition2)[use.samples.rows]))
rpkm.data_linear_clean <- rpkm.batch
#rpkm.data_linear_clean <- rpkm.data_linear[,use.samples]
sample.info_clean$Condition2 <- as.character(ifelse(grepl("Adj",sample.info_clean$Condition)==T, "Adj","Imm"))
rpkm_box_plot <- function(x){
rpkm_test <- as.data.frame(melt(rpkm.data_linear_clean[x,]))
rpkm_test_w_info <- cbind(rpkm_test, sample.info_clean[,"Condition2"])
rpkm_test_w_info <- cbind(rpkm_test_w_info, sample.info_clean[,"Region"])
colnames(rpkm_test_w_info) <- c("RPKM", "Condition", "Region")
#j_brew_colors <- brewer.pal(n = 8, name = "Paired")[c(2,6)]
j_brew_colors <- c("#1F78B4", "#33A02C", "#E31A1C")
ggplot(rpkm_test_w_info)+
geom_boxplot(aes(x = Condition, y= RPKM, colour=Condition), alpha=0.3, position="identity")+
geom_jitter(aes(x = Condition, y= RPKM, fill=Region), size=4, alpha=0.6, pch=21, width = 0.2)+
theme_bw()+
geom_smooth(method = "loess", se=T, aes(x = Condition, y= RPKM, fill=Condition, group=Condition), size = 1, alpha=0.1)+
theme(axis.text.x=element_text(angle=50, vjust=0.9, hjust=1, size=12, face="bold"))+
labs(title= x, x="")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
scale_fill_manual(values = j_brew_colors)+
scale_colour_manual(values = brewer.pal(n = 8, name = "Paired")[c(2,6, 3)])+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size=20),
legend.title = element_text(size=20))
}
#rpkm_box_plot(glm.output.table_2$gene_name[5])
pl <- lapply(glm.output.table$gene_name[1:30], function(x) rpkm_box_plot(x))
ml <- marrangeGrob(pl, nrow=10, ncol=3, top = "")
ml
#save.image("G:/Shared drives/Nord Lab - Personal Folders/Karol/MIAx01/index.RData")
rpkm.data_linear_2 <- rpkm(exp.data_2, gene.length=gene.lengths, log=F)
#This is some ugly coding, but works correctly.
ed <- rpkm(exp.data_2[,use.samples_2], gene.length=gene.lengths, log=F)
rpkm.batch <- removeBatchEffect(ed,
batch = sample.info_clean$Region[use.samples_2.rows],
design = cbind(
sample.info_clean$Lane[use.samples_2.rows],
as.factor(sample.info_clean$Sex)[use.samples_2.rows],
as.factor(sample.info_clean$Condition2)[use.samples_2.rows]))
rpkm.data_linear_clean <- rpkm.batch
#rpkm.data_linear_clean <- rpkm.data_linear_2[,use.samples_2]
sample.info_clean <- sample.info[use.samples_2.rows,]
sample.info_clean$Condition2 <- as.character(ifelse(grepl("Adj",sample.info_clean$Condition)==T, "Adj","Imm"))
rpkm_box_plot <- function(x){
rpkm_test <- as.data.frame(melt(rpkm.data_linear_clean[x,]))
rpkm_test_w_info <- cbind(rpkm_test, sample.info_clean[,"Condition2"])
rpkm_test_w_info <- cbind(rpkm_test_w_info, sample.info_clean[,"Region"])
colnames(rpkm_test_w_info) <- c("RPKM", "Condition", "Region")
#j_brew_colors <- brewer.pal(n = 8, name = "Paired")[c(2,6)]
j_brew_colors <- c("#1F78B4", "#33A02C", "#E31A1C")
ggplot(rpkm_test_w_info)+
geom_boxplot(aes(x = Condition, y= RPKM, colour=Condition), alpha=0.3, position="identity")+
geom_jitter(aes(x = Condition, y= RPKM, fill=Region), size=4, alpha=0.6, pch=21, width = 0.2)+
theme_bw()+
geom_smooth(method = "loess", se=T, aes(x = Condition, y= RPKM, fill=Condition, group=Condition), size = 1, alpha=0.1)+
theme(axis.text.x=element_text(angle=50, vjust=0.9, hjust=1, size=12, face="bold"))+
labs(title= x, x="")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
scale_fill_manual(values = j_brew_colors)+
scale_colour_manual(values = brewer.pal(n = 8, name = "Paired")[c(2,6, 3)])+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size=20),
legend.title = element_text(size=20))
}
#rpkm_box_plot(glm.output.table_2$gene_name[5])
#colnames(glm.output.table) <- c("gene_name", "logFC", "logCPM", "LR", "PValue", "FDR")
pl <- lapply(glm.output.table_2$gene_name[1:30], function(x) rpkm_box_plot(x))
ml <- marrangeGrob(pl, nrow=10, ncol=3, top = "")
ml